Part 2. Compare mean lobster sizes (carapace length (mm)) across the five sites for lobster observations collected in 2017.

# Create data frame filter for 5 locations and year 2017, select size (remove all -9999 values and turn data into case-format)

compare_ls <- select(size_abund,YEAR,SITE,SIZE,COUNT) %>% 
  filter(YEAR == 2017, 
         SITE=="AQUE"|SITE=="NAPL"|SITE=="MOHK"|SITE=="IVEE"|SITE=="CARP",
         SIZE!= "-99999")
#Remove multiple counts and organize into rows 
ls_comp <- uncount(compare_ls, weights = COUNT, .remove = TRUE, .id = NULL)
#Count in decimal numbers ? 

#View(counts)
View (ls_comp)
# Exploratory graphs 

hist_ls <- ggplot(ls_comp, aes(x = SIZE))+
  geom_histogram(bins = 10) +
  facet_wrap(~ SITE, scale = "free")

hist_ls

qq_ls <-ggplot(ls_comp, aes(sample = SIZE)) +
  geom_qq(bins = 10) +
  facet_wrap(~ SITE, scale = "free")
## Warning: Ignoring unknown parameters: bins
qq_ls

# Groups approx. normally distributed  

box_ls <-  ggplot(ls_comp, aes(x = SITE, y = SIZE)) +
  geom_boxplot(width = .4) +
  geom_jitter(width = 0.1, alpha = 0.5, aes(color = SITE))

box_ls

#Leven's Test 
variances_ls <- ls_comp %>% 
  group_by(SITE) %>% 
  summarize(
    mean = mean(SIZE),
    sd = sd(SIZE),
    variance = var(SIZE)
  )
View(variances_ls)

# assume equal variance 

levene_ls <- leveneTest(SIZE ~ SITE, data = ls_comp)
levene_ls
## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value    Pr(>F)    
## group    4  8.3893 1.065e-06 ***
##       1663                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Ho: No difference in variances (Variances are equal)
# Ha: Variances are NOT equal

# There issignificant differences in variances between SITES ? (or use robust SE) 16  Can we still use anova if L-test not affected ? or Welsh ?  

#ANOVA One-Way 
#H0: All means are equall 
#HA: At least two mean are not equal 

ls_anova <- aov(SIZE ~ SITE, data = ls_comp)
ls_sum <- summary(ls_anova)
ls_sum
##               Df Sum Sq Mean Sq F value Pr(>F)   
## SITE           4   2355   588.6   3.424 0.0085 **
## Residuals   1663 285871   171.9                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#At least two means are NOT equal ?

# Which ones ? 
ls_tukey <- TukeyHSD(ls_anova)
ls_tukey
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = SIZE ~ SITE, data = ls_comp)
## 
## $SITE
##                 diff         lwr      upr     p adj
## CARP-AQUE -1.6657352 -6.24294710 2.911477 0.8582355
## IVEE-AQUE -2.4433772 -7.05292315 2.166169 0.5968998
## MOHK-AQUE -1.8955224 -7.02720717 3.236162 0.8514711
## NAPL-AQUE  2.3366205 -3.19311600 7.866357 0.7775633
## IVEE-CARP -0.7776420 -2.76097123 1.205687 0.8216104
## MOHK-CARP -0.2297872 -3.23309697 2.773523 0.9995765
## NAPL-CARP  4.0023556  0.36042398 7.644287 0.0228728
## MOHK-IVEE  0.5478548 -2.50450730 3.600217 0.9882889
## NAPL-IVEE  4.7799976  1.09751057 8.462485 0.0037001
## NAPL-MOHK  4.2321429 -0.08607271 8.550358 0.0579286
# Table comparing ??

stats_ls <- ls_comp %>% 
  group_by(SITE) %>% 
  summarize(
    mean = round(mean(SIZE),2),
    sd = round(sd(SIZE),2))


figure_ls <- ggplot(stats_ls, aes(x = SITE, y = mean)) +
  geom_col(colour = NA, fill = "#FF6666", width = 0.5) +
  geom_errorbar(aes(ymax = mean + sd, ymin = mean - sd), width = 0.1)+
   scale_y_continuous(expand = c(0,0), limits = c(0,120))+
    labs(y=expression(Mean~Lobster~Size~~(m)))+
  scale_x_discrete() +
  xlab("\nResearch Sites")+
   geom_signif(comparisons = list(c("NAPL","CARP")), annotations = "p = 0.023", y_position = 95, tip_length = 0.1, size = 0.5, textsize = 3)+
  geom_signif(comparisons = list(c("NAPL","IVEE")), annotations = "p = 0.004", y_position = 110, tip_length = 0.1, size = 0.5, textsize = 3)+
  theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank(), axis.line = element_line(colour = "black"),plot.title = element_text(face ="bold"))+
  ggtitle("Lobster Size Distribution at SBC LTER Sites")
  
 
 
figure_ls

#numer of samples for each site
ls_comp %>% count(SITE)
## # A tibble: 5 x 2
##   SITE      n
##   <fct> <int>
## 1 AQUE     67
## 2 CARP    705
## 3 IVEE    606
## 4 MOHK    178
## 5 NAPL    112

Figure XX. Lobsrer Size Destribution at SBC LTER SITES. Mean lobster size(mm) for locations AQUE (n=67), CARP (n=705), IVEE (n=606), MOHK (n=178), and NAPL (n=122) Santa Barbara County. Error bars indicate +/- 1 standard deviation. Brackets indicate p-values for significantly different means as determined by one-way ANOVA with Tukey’s HSD (F(3,42) = ???, p < 0.001, with \(\alpha\) = 0.05 throughout). Data source: Santa Barbara Coastal (bc.lternet.edu/cgi-bin/showDataset.cgi?docid=knb-lter-sbc.77)

Part 3: Comparing changes in lobster size at MPA and non-MPA sites (comparing only 2012 and 2017 sizes)

At Isla Vista and Naples Reef, the two protected MPA sites (with zero fishing pressure), how do lobster sizes in 2012 and 2017 compare? At the non-MPA sites?

#create dataframe to compare lobster size in 2012 and 2017 at MPA and non-MPA sites 

MPA_df <- size_abund %>%
  filter(YEAR == "2012" | YEAR == "2017",  SIZE != "-99999") %>%
  mutate(
    MPA_STATUS = case_when( 
      SITE == "IVEE" ~ "MPA",
      SITE == "NAPL" ~ "MPA",
      SITE == "AQUE" ~ "NON-MPA",
      SITE == "CARP" ~ "NON-MPA",
      SITE == "MOHK" ~ "NON-MPA")) %>%
  select(YEAR, SITE, SIZE, COUNT, MPA_STATUS)

#arrange data so that each lobster has its own row 
MPAdf <- as.data.frame(expand.dft(MPA_df, freq = "COUNT"))
     
#View(MPAdf)

#create data summary frame of lobster size median, max, mean, SD, and sample size 

MPA_table <- as.data.frame(expand.dft(size_abund, freq = "COUNT")) %>%
group_by(YEAR) %>% #group data together by same month
  summarize(
    median = round(median(SIZE),2), #find median, round to 2 digits
    max = max(SIZE), #identify maximum 
    mean = round(mean(SIZE),2), #find mean, round to 2 digits 
    stdev = round(sd(SIZE),2), #find SD, round to 2 digits
    length(SIZE)) #find sample size 

#View(MPA_table)
#Create exloratory histograms and qq plots

#histogram to compare 2012 and 2017 lobster sizes 
MPAhist <- ggplot(MPAdf, aes(x = SIZE)) +
  geom_histogram(aes(bins=15, fill = SITE)) +
                   facet_wrap(~YEAR ~MPA_STATUS, scale = "free") +
                   theme_classic()
## Warning: Ignoring unknown aesthetics: bins
MPAhist
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#qq plot to compare 2012 and 2017 lobster sizes 
MPAqq <- ggplot(MPAdf, aes(sample = SIZE)) +
  geom_qq(aes(color = SITE)) + 
  facet_wrap(~YEAR ~MPA_STATUS, scale = "free") + 
  theme_classic()

MPAqq

Part 3 question of interest:

Is there a significant difference between lobster size at each of the sites in 2012 and 2017?

#create data frames to perform statistical tests to answer the question above 

#AQUE dataframe
AQUEdf <- MPAdf %>%
  filter(SITE == "AQUE")
#View(AQUEdf)

#CARP datafram
CARPdf <- MPAdf %>%
  filter(SITE == "CARP")
#View(CARPdf)

#MOHK dataframe
MOHKdf <- MPAdf %>%
  filter(SITE == "MOHK")
#View(MOHKdf)

#IVEE dataframe
IVEEdf <- MPAdf %>%
  filter(SITE == "IVEE")
#View(IVEEdf)

#NAPL dataframe 
NAPLdf <- MPAdf %>%
  filter(SITE == "NAPL")
#View(NAPLdf)

########################

mpa_sites <- MPAdf %>%
  filter(MPA_STATUS == "MPA")

#View(mpa_sites)

nonmpa_sites <- MPAdf %>%
  filter(MPA_STATUS == "NON-MPA")

#View(nonmpa_sites)

post_sites <- MPAdf %>%
  filter(YEAR == "2017")

#View(post_sites)
#AQUE site (comparing 2012 and 2017 sizes)

#F-Test for equal variances 
#AQUE_ftest <- AQUEdf %>%
  #var.test(SIZE ~ YEAR, data = .)

#AQUE_ftest 
#RESULT: Variances are NOT equal 

#T-test 
AQUE_ttest <- AQUEdf %>%
  t.test(SIZE ~ YEAR, data =.)

#AQUE_ttest

#NOT significantly different (p-value = 0.1907)
#CARP site (comparing 2012 and 2017 sizes)

#F-Test for equal variances 
#CARP_ftest <- CARPdf %>%
  #var.test(SIZE ~ YEAR, data = .)

#CARP_ftest 
#RESULT: Variances are NOT equal  

#T-test 
CARP_ttest <- CARPdf %>%
  t.test(SIZE ~ YEAR, data =.)

#CARP_ttest

#NOT significantly different (p-value = 0.2211)
#MOHK site (comparing 2012 and 2017 sizes)

#F-Test for equal variances 
#MOHK_ftest <- MOHKdf %>%
  #var.test(SIZE ~ YEAR, data = .)

#MOHK_ftest 
#RESULT: Variances are NOT equal  

#T-test 
MOHK_ttest <- MOHKdf %>%
  t.test(SIZE ~ YEAR, data =.)

#MOHK_ttest

#SIGNIFICANTLY DIFFERENT (p-value = 0.0001599)
#IVEE site (comparing 2012 and 2017 sizes)

#F-Test for equal variances 
#IVEE_ftest <- IVEEdf %>%
  #var.test(SIZE ~ YEAR, data = .)

#IVEE_ftest 
#RESULT: Variances are NOT equal  

#T-test 
IVEE_ttest <- IVEEdf %>%
  t.test(SIZE ~ YEAR, data =.)

#IVEE_ttest

#SIGNIFICANTLY DIFFERENT (p-value = 0.0361)
#NAPL site (comparing 2012 and 2017 sizes)

#F-Test for equal variances 
#NAPL_ftest <- NAPLdf %>%
  #var.test(SIZE ~ YEAR, data = .)

#NAPL_ftest 
#RESULT: Variances are NOT equal  

#T-test 
NAPL_ttest <- NAPLdf %>%
  t.test(SIZE ~ YEAR, data =.)

#NAPL_ttest

#NOT significantly different (p-value = 0.5373)
####
#Comparing lobster size at ALL MPA sites (IVEE and NAPL) in 2012 and 2017
####

#F Test for equal variance 
#H0: Ratio of variances is = 1 
#HA: Ratio of variances is NOT = 1

mpa_ftest <- mpa_sites %>%
  var.test(SIZE ~ YEAR, data = .)

mpa_ftest
## 
##  F test to compare two variances
## 
## data:  SIZE by YEAR
## F = 0.75323, num df = 31, denom df = 717, p-value = 0.3346
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.477719 1.341900
## sample estimates:
## ratio of variances 
##          0.7532319
# Test determined variances are NOT equal 

#Two sample two sided T-Test
#H0: The difference between the means = 0
#HA: The difference between the means is NOT = 0

mpa_ttest <- mpa_sites %>%
  t.test(SIZE ~ YEAR, data =., var.equal = TRUE)

mpa_ttest
## 
##  Two Sample t-test
## 
## data:  SIZE by YEAR
## t = -1.9159, df = 748, p-value = 0.05576
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -9.7644724  0.1189292
## sample estimates:
## mean in group 2012 mean in group 2017 
##           67.37500           72.19777
#NOT SIGNIFICANTLY DIFFERENT (p-value = 0.05576)
###
#Comparing lobster size at ALL non-MPA sites (CARP, MOHK, and AQUE) in 2012 and 2017
###

#F Test for equal variance 
#H0: Ratio of variances is = 1 
#HA: Ratio of variances is NOT = 1

nonmpa_ftest <- nonmpa_sites %>%
  var.test(SIZE ~ YEAR, data = .)

nonmpa_ftest
## 
##  F test to compare two variances
## 
## data:  SIZE by YEAR
## F = 0.99085, num df = 198, denom df = 949, p-value = 0.953
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.8037718 1.2406929
## sample estimates:
## ratio of variances 
##          0.9908519
# Test determined variances are NOT equal 

#Two sample two sided T-Test
#H0: The difference between the means = 0
#HA: The difference between the means is NOT = 0

nonmpa_ttest <- nonmpa_sites %>%
  t.test(SIZE ~ YEAR, data =., var.equal = TRUE)

nonmpa_ttest
## 
##  Two Sample t-test
## 
## data:  SIZE by YEAR
## t = 2.6973, df = 1147, p-value = 0.007093
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.7143078 4.5265173
## sample estimates:
## mean in group 2012 mean in group 2017 
##           74.92462           72.30421
#SIGNIFICANTLY DIFFERENT (p-value = 0.007093)
###
#Comparing lobster size at MPA vs. Non-MPA sites in 2017
###
#Create finalized data table 

sizesum <- MPAdf %>%
  group_by(SITE, YEAR) %>%
  summarize(
    mean = round(mean(SIZE), 1),
    stdev = round(mean(SIZE), 1),
    length(SIZE))
  
sizesum
## # A tibble: 10 x 5
## # Groups:   SITE [?]
##    SITE   YEAR  mean stdev `length(SIZE)`
##    <fct> <int> <dbl> <dbl>          <int>
##  1 AQUE   2012  71    71               38
##  2 AQUE   2017  73.9  73.9             67
##  3 CARP   2012  74.4  74.4             78
##  4 CARP   2017  72.2  72.2            705
##  5 IVEE   2012  66.1  66.1             26
##  6 IVEE   2017  71.5  71.5            606
##  7 MOHK   2012  77.3  77.3             83
##  8 MOHK   2017  72    72              178
##  9 NAPL   2012  73    73                6
## 10 NAPL   2017  76.2  76.2            112
kable(sizesum, col.names = c("Site", "Year", "Mean (mm)", "SD (mm)", "sample size"), caption = "**Table 1**. California spiny lobster (*Panulirus interruptus*) carapace length (mm) at five Santa Barbara Channel reefs in 2012 and 2017. Source: Santa Barbara Coastal Long Term Ecological Research") %>%
  kable_styling(bootstrap_options = c("striped"), full_width = F) %>%
  collapse_rows(columns = 1)
Table 1. California spiny lobster (Panulirus interruptus) carapace length (mm) at five Santa Barbara Channel reefs in 2012 and 2017. Source: Santa Barbara Coastal Long Term Ecological Research
Site Year Mean (mm) SD (mm) sample size
AQUE 2012 71.0 71.0 38
2017 73.9 73.9 67
CARP 2012 74.4 74.4 78
2017 72.2 72.2 705
IVEE 2012 66.1 66.1 26
2017 71.5 71.5 606
MOHK 2012 77.3 77.3 83
2017 72.0 72.0 178
NAPL 2012 73.0 73.0 6
2017 76.2 76.2 112